// Trade-Policy Dynamics: Evidence from 60 Years of U.S.-China Trade
// Alessandria,  Khan, Khederlarian, Ruhl, and Steinberg

// inputs:	HTS_TSUS.xlsx
	
// outputs:	tsus_to_new.dta

// This code builds the concordance between tsusa codes and hts codes. It is used
// later to construct a long time series at a finer disagregation than the one
// used in the main text. 

set varabbrev off 
clear all 

global dir_int "../20 Intermediate files/"
global dir_raw "../01 Raw data/"


import excel "${dir_raw}HTS_TSUS.xlsx", sheet("Sheet1") firstrow
g temp=1
bysort HTS: egen tempsumhts=sum(temp)
bysort TSUS: egen tempsumtsus=sum(temp)

g rel = "."
replace rel = "1to1" if tempsumhts==1 & tempsumtsus==1
replace rel = "1ton" if tempsumhts>1 & tempsumtsus==1
replace rel = "nto1" if tempsumhts==1 & tempsumtsus>1
replace rel = "nton" if tempsumhts>1 & tempsumtsus>1
g tempsumtsus2 = -tempsumtsus
g tempsumhts2 = -tempsumhts
sort HTS tempsumtsus2
replace rel = "nton" if HTS==HTS[_n-1] & rel!="nton" & rel[_n-1]=="nton"
sort TSUS tempsumhts2
replace rel = "nton" if TSUS==TSUS[_n-1] & rel!="nton" & rel[_n-1]=="nton"
tab rel

foreach x in "1to1" "nto1" "1ton" "nton" {
	preserve 
	drop if rel!="`x'"
	keep HTS TSUS
	save "${dir_int}hts_tsus_`x'.dta", replace
	restore
}


use "${dir_int}hts_tsus_nton.dta", clear

encode HTS, g(family_hts)
sort TSUS HTS
replace family_hts=family_hts[_n-1] if TSUS==TSUS[_n-1]

encode HTS, g(family_tsus)
sort HTS TSUS
replace family_tsus=family_tsus[_n-1] if TSUS==TSUS[_n-1]

g family = family_hts
sort family_tsus family
replace family=family[_n-1] if family_tsus==family_tsus[_n-1] | family_hts==family_hts[_n-1]
sort family_hts family
replace family=family[_n-1] if family_tsus==family_tsus[_n-1] | family_hts==family_hts[_n-1]
egen temptaghts = tag(HTS family)
bysort HTS: egen tempsumhts = sum(temptaghts)
sum tempsumhts, d
loc count_hts = r(max)
egen temptagtsus = tag(TSUS family)
bysort TSUS: egen tempsumtsus = sum(temptagtsus)
sum tempsumtsus, d
loc count_tsus = r(max)
g temp=1
bysort family: egen tempsum=sum(temp)
replace tempsum=-tempsum

while `count_tsus'>1 | `count_hts'>1 {
sort TSUS tempsum
replace family=family[_n-1] if TSUS==TSUS[_n-1]  & family!=family[_n-1]
drop temp*
g temp=1
bysort family: egen tempsum=sum(temp)
replace tempsum=-tempsum
sort HTS tempsum
replace family=family[_n-1] if HTS==HTS[_n-1]  & family!=family[_n-1]
drop temp*
egen temptaghts = tag(HTS family)
bysort HTS: egen tempsumhts = sum(temptaghts)
qui sum tempsumhts, d
loc count_hts = r(max)
egen temptagtsus = tag(TSUS family)
bysort TSUS: egen tempsumtsus = sum(temptagtsus)
qui sum tempsumtsus, d
loc count_tsus = r(max)
g temp=1
bysort family: egen tempsum=sum(temp)
replace tempsum=-tempsum
di `count_hts'
di `count_tsus'
}
drop temp*
egen family_n = group(family)
keep HTS TSUS family_n
save "${dir_int}hts_tsus_nton.dta", replace

////////////////////////////////////////////////////////////////////////////////
// Generate file for HTS to new code
use "${dir_int}hts_tsus_1to1.dta", clear
sort HTS
egen temp = group(HTS)
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
g family_n1 = "1to1"+temp
drop temp

append using "${dir_int}hts_tsus_1ton.dta"
egen temp = group(HTS) if family_n1==""
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "1ton"+temp if family_n1==""
drop temp

append using "${dir_int}hts_tsus_nto1.dta"
egen temp = group(TSUS) if family_n1==""
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "nto1"+temp if family_n1==""
drop temp

append using "${dir_int}hts_tsus_nton.dta"
tostring family_n, g(temp)
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "nton"+temp if family_n1==""
drop temp family_n

rename family_n1 family
egen temptag=tag(HTS family)
bysort HTS: egen tempsum = sum(temptag)
drop if temptag==0
keep HTS family

unique HTS
unique family
save "${dir_int}hts_to_new.dta", replace

////////////////////////////////////////////////////////////////////////////////
// Generate file for TSUS to new code
use "${dir_int}hts_tsus_1to1.dta", clear
sort HTS
egen temp = group(HTS)
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
g family_n1 = "1to1"+temp
drop temp

append using "${dir_int}hts_tsus_1ton.dta"
egen temp = group(HTS) if family_n1==""
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "1ton"+temp if family_n1==""
drop temp

append using "${dir_int}hts_tsus_nto1.dta"
egen temp = group(TSUS) if family_n1==""
tostring temp, replace
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "nto1"+temp if family_n1==""
drop temp

append using "${dir_int}hts_tsus_nton.dta"
tostring family_n, g(temp)
replace temp="000"+temp if length(temp)==1
replace temp="00"+temp if length(temp)==2
replace temp="0"+temp if length(temp)==3
replace family_n1 = "nton"+temp if family_n1==""
drop temp family_n

rename family_n1 family
egen temptag=tag(TSUS family)
bysort TSUS: egen tempsum = sum(temptag)
drop if temptag==0
keep TSUS family

unique TSUS
unique family
save "${dir_int}tsus_to_new.dta", replace
